/* Typedefs to tame the C language. No compiler switches, because short always
* seems to be 16-bit... */
typedef signed char signed8;
typedef unsigned char unsigned8;
typedef signed short signed16;
typedef unsigned short unsigned16;
typedef unsigned long unsigned32;
typedef signed long signed32;
/* Data structure for the parallel-add algorithms */
typedef struct rgb_yuv_data {
unsigned32 yuv_rg_l[65536L];
unsigned32 yuv_rg_h[65536L];
unsigned32 yuv_b_l[256];
unsigned32 yuv_b_h[256];
} rgb_yuv_data;
Transform matrix: Determines the orientation of the RGB color cube within YUV space, and the relative intensities of R, G, and B.
NOTE: Whenever the matrix is changed, error analysis has to be done to determine if 20 bits is still enough accuracy to determine the rounding direction of results!
signed32 ml[] = {
2508194L, 4924113L, 956301L,
-1415459L, -2778845L, 4194304L,
4194304L, -3512206L, -682098L };
RGBtoYUVInit
/* Parallel addition, 64-bit math version.
* Hi format is 00CVVVVVVVVvvvvvvvvvvvvCUUUUUUUU
* Lo format is uuuuuuuuuuuuYYYYYYYYyyyyyyyyyyyy
*/
void *RGBtoYUVInit()
{
rgb_yuv_data *p;
Handle h;
OSErr err;
unsigned16 r, g, b;
signed32 index;
signed32 yl, ul, vl;
unsigned32 yi;
signed32 ui, vi;
unsigned32 round_adjust = 0x7fe;
unsigned32 lo12 = 0x0fffL;
unsigned32 lo20 = 0xfffffL;
unsigned32 datasize = sizeof(rgb_yuv_data);
h = TempNewHandle(datasize, &err);
HLock(h);
p = *((rgb_yuv_data **) h);
for (r=0; r<256; r++) {
for (g=0; g<256; g++) {
yl = ml[0] * ((signed32) r)
+ ml[1] * ((signed32) g);
ul = ml[3] * ((signed32) r)
+ ml[4] * ((signed32) g);
vl = ml[6] * ((signed32) r)
+ ml[7] * ((signed32) g);
yi = (yl + 1024)>>11L; yi += round_adjust;
ui = (ul + 1024)>>11L; ui += round_adjust;
vi = (vl + 1024)>>11L; vi += round_adjust;
index = (((long)r)<<8L) | ((long)g);
(p->yuv_rg_h)[index] =
((vi&lo20)<<9L) | ((ui&lo20)>>12L);
(p->yuv_rg_l)[index] = ((ui&lo12)<<20L) | yi;
}
}
for (b=0; b<256; b++) {
yl = ml[2] * ((signed32) b);
ul = ml[5] * ((signed32) b);
vl = ml[8] * ((signed32) b);
yi = (yl + 1024)>>11L;
ui = (ul + 1024)>>11L;
vi = (vl + 1024)>>11L;
index = b;
p->yuv_b_h[index] =
((vi&lo20)<<9L) | ((ui&lo20)>>12L);
p->yuv_b_l[index] = ((ui&lo12)<<20L) + yi;
}
return ((void *)p);
}
//RGBtoYUV for 64-bit math version.
It’s hard to read because the instructions were reordered to minimize pipeline stalls from result dependencies on the ‘040.